import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.metrics import roc_curve, auc #for model evaluation
from sklearn.metrics import classification_report #for model evaluation
from sklearn.metrics import confusion_matrix #for model evaluation
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, precision_recall_curve, \
average_precision_score
from inspect import signature

from IPython.display import Image 
from pydot import graph_from_dot_data
from sklearn.tree import export_graphviz
from six import StringIO

masterprojekt_daten = pd.read_csv("./Messergebnisse_merged.csv")
masterprojekt_daten = masterprojekt_daten.drop(columns=['Unnamed: 0', 'timeStamp'], axis=0)
#print(masterprojekt_daten.columns, masterprojekt_daten.dtypes)

#print(masterprojekt_daten.dtypes)

masterprojekt_daten

x = masterprojekt_daten.drop(columns=['Label'])
y = masterprojekt_daten['Label']

x.shape, y.shape

((1377, 22), (1377,))

X_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=78678)

dec_tree = tree.DecisionTreeClassifier(max_depth=3)
dec_tree = dec_tree.fit(X_train, y_train)

dot_data = StringIO()
export_graphviz(dec_tree, out_file=dot_data, feature_names=X_train.columns)
(graph, ) = graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())

y_pred = dec_tree.predict(x_test)
y_probs = dec_tree.predict_proba(x_test)

print('Cross validation of the Data set: ', cross_val_score(dec_tree, x, y, verbose=3))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))

[CV]  ................................................................
[CV] .................................... , score=0.804, total=   0.0s
[CV]  ................................................................
[CV] .................................... , score=0.473, total=   0.0s
[CV]  ................................................................
[CV] .................................... , score=0.686, total=   0.0s
Cross validation of the Data set:  [0.80434783 0.47276688 0.68558952]
Accuracy: 0.8659420289855072
Precision: 0.5714285714285714
Recall: 0.7111111111111111

C:\Users\MLDigitalLab\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s finished

probabilities = y_probs[:,1]

fpr, tpr, thresholds = roc_curve(y_test, probabilities)
roc_auc = auc(fpr, tpr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'y', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

precision, recall, threshold = precision_recall_curve(y_test, y_pred)
average_precision = average_precision_score(y_test, y_pred)
step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {})
plt.step(recall, precision, color='r', alpha=0.2, where='post')
plt.fill_between(recall, precision, alpha=0.2, color='r', **step_kwargs)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))

Text(0.5, 1.0, '2-class Precision-Recall curve: AP=0.45')

import sklearn
matrix = confusion_matrix(y_test, y_pred)
class_names = masterprojekt_daten.columns
ax = plt.subplot()

# plt.figure(figsize = (10,7))
sns.heatmap(matrix, annot=True, fmt='d', ax = ax, annot_kws={"size": 16})
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['i.O.', 'n.I.O.']); ax.yaxis.set_ticklabels(['i.O.', 'n.I.O.'])
#ax.yaxis.set_major_locator(ticker.IndexLocator(base=1, offset=0.5))

[Text(0, 0.5, 'i.O.'), Text(0, 1.5, 'n.I.O.')]

	Energy_Savings.Active_Power_Spindle	Energy_Savings.Active_Power_Z	Main_Spindle.Actual_Position_MCS	Main_Spindle.Actual_Power	Main_Spindle.Actual_Speed_Rate	Main_Spindle.MPC_Peak	Main_Spindle.MPC_Veff_Total	Main_Spindle.Temperature_Spindle	Tool_Control_Center.Axial_Force_Tension	Tool_Control_Center.Bending_Moment	...	Y1_Axis.Temperature_Y_Slide	Z1_Axis.Actual_Feed_Rate	Z1_Axis.Actual_Position_MCS	Z1_Axis.Actual_Power	Energy_Savings.Active_Power_X	Tool_Control_Center.Axial_Force_Compression	X1_Axis.Actual_Feed_Rate	X1_Axis.Actual_Position_MCS	A1_Axis.Actual_Power	Label
0	3.126359	53.527900	179.816218	0.500000	317.379310	1.000000	0.50	28.105000	1.258537	2.500000	...	24.765000	354.875000	-304.447544	13.739130	416.853000	0.052632	1011.666667	367.525	5.5	0
1	1.763218	-223.576625	184.350446	0.507463	317.500000	0.666667	0.50	28.067143	0.674074	0.850000	...	24.766667	8.800000	-302.581802	13.766667	416.853000	0.052632	1011.666667	367.525	5.5	0
2	1.398177	382.341625	181.430268	0.500000	317.300000	2.333333	0.50	28.067143	0.047826	0.478261	...	24.800000	-156.000000	-302.535946	12.562500	-60.904000	0.048148	3049.500000	384.030	5.5	0
3	1.065278	700.204571	177.692973	0.492308	317.128205	1.666667	1.00	28.032857	0.050000	0.500000	...	24.800000	26.000000	-303.418349	17.350000	-111.657000	0.054286	0.000000	397.960	5.5	0
4	0.225588	-132.687143	175.616696	0.508475	317.062500	1.500000	1.00	28.000000	0.050000	0.529412	...	24.800000	67.909091	-302.540270	16.608696	4.060500	0.052000	4358.500000	410.085	5.5	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1372	-16.873760	303.448700	180.035357	0.500000	951.461538	1.000000	0.75	27.290000	0.000000	0.500000	...	23.305000	-72.166667	-323.262222	13.600000	0.000000	0.150000	1185.000000	410.090	6.5	0
1373	-13.696250	627.496222	171.974828	0.500000	951.500000	1.600000	0.00	27.290000	0.000000	0.333333	...	23.308000	-9.666667	-323.293333	17.700000	-261.210000	0.150000	14.500000	425.330	6.5	1
1374	-15.487385	56.574333	189.220345	0.533333	951.500000	1.750000	0.80	27.290000	0.000000	0.666667	...	23.305000	249.333333	-319.254444	24.181818	16.692333	0.150000	1742.500000	433.260	6.5	0
1375	-16.962231	-276.777900	169.227931	0.500000	951.500000	0.500000	0.00	27.290000	0.000000	0.500000	...	23.305000	304.222222	-320.037857	26.875000	418.206500	0.150000	4252.000000	450.395	6.5	0
1376	-14.012231	46.118455	174.061034	0.500000	951.538462	1.400000	0.80	27.290000	0.000000	0.500000	...	23.300000	262.500000	-323.282222	17.133333	0.000000	0.100000	3980.500000	468.295	6.5	0